rm(list = ls())

library(ggplot2)

# Purpose: Calculate average coherence and average FREX

### PATHS ##############################
semantic.coherence.path <- "./data/"
out.path <- "./data/"
#######################################

for (weight in c("0.5", "0.7", "1.0")) {
    frex.path <- paste0("./data/FREX_values_",weight,"/")
    
    for (no.words in c(5, 10, 15, 20)) {
        # list to hold results
        data <- list()
         
        # loop over models and calculate average coherence and average FREX for each topic
        cat("Calculating average semantic coherence and FREX for model\n")
        for (i in seq(1,31,1)) {
         
            topic.model <- i + 9
            cat(paste("... ", topic.model, "\n"))
            
            # open files
            fname <- paste0(semantic.coherence.path,"semantic_coherence_model_",topic.model,"_top_",no.words,"_words.csv")
            coherence <- read.csv(fname)
         
            fname <- paste0(frex.path,"FREX_values_topic_model_",topic.model,".csv")
            frex <- read.csv(fname, sep=" ", header=FALSE)
            frex <- frex[1:no.words,]
         
            data[[i]] <- data.frame(coherence, frex=apply(frex, 2, mean))
        }
         
         
        # calculate average values across topics
        avg <- data.frame(
            topic=seq(10,40,1),
            mean.coherence=unlist(lapply(data, function(x) mean(x$coherence))),
            mean.frex=unlist(lapply(data, function(x) mean(x$frex))),
            sd.coherence=unlist(lapply(data, function(x) sd(x$coherence))),
            sd.frex=unlist(lapply(data, function(x) sd(x$frex)))
        )
        # save data
        w <- gsub("\\.","_",as.character(weight))
        fname <- paste0(out.path, "avg_coherence_FREX_top_",no.words,"_words_weight_",w,".RData")
        save(avg,file=fname)

    }    
}


